#include "driver/platform/platform.h"

#if defined(WORKAROUND_USE_ICU)

#include "driver/utils/unicode_converter.h"

bool sameEncoding(const std::string & lhs, const std::string & rhs) {
    return (ucnv_compareNames(lhs.c_str(), rhs.c_str()) == 0);
}

UnicodeConverter::UnicodeConverter(const std::string & encoding) {
    // Create ICU converter instance.
    {
        UErrorCode error_code = U_ZERO_ERROR;
        UConverter * converter = ucnv_open(encoding.c_str(), &error_code);

        if (U_FAILURE(error_code))
            throw std::runtime_error(u_errorName(error_code));

        if (!converter)
            throw std::runtime_error("ucnv_open(" + encoding + ") failed");

        converter_ = converter;
    }

    // Fill/detect some signature/BOM info.
    {
        // UTF-16/UCS-2/UTF-32/UCS-4 without byte order meta-info (*LE/*BE), are considered to be in local byte order by this driver!
        // Note, that UCS-2 and UCS-4 with byte order meta-info are not recognised by ICU.
        // BOM bytes on encodings with fully specified byte order meta-info are accepted.

        // For those encodings for which force_non_empty_signature_to_prepend_detection is false,
        // encoded_signature_to_prepend_ will be chosen only if the converter generates one itself.
        bool force_non_empty_signature_to_prepend_detection = true;

        // Treating plain UTF-16/UTF-32 as if they are in the native byte-order.
        if (sameEncoding(encoding, "UTF-1")) {
            force_non_empty_signature_to_prepend_detection = false;
            encoded_signatures_to_trim_.push_back(make_raw_str({ 0xF7, 0x64, 0x4C }));
        }
        else if (sameEncoding(encoding, "UTF-7")) {
            force_non_empty_signature_to_prepend_detection = false;
            encoded_signatures_to_trim_.push_back(make_raw_str({ 0x2B, 0x2F, 0x76, 0x38, 0x2D })); // ...this should come first.
            encoded_signatures_to_trim_.push_back(make_raw_str({ 0x2B, 0x2F, 0x76, 0x38 }));
            encoded_signatures_to_trim_.push_back(make_raw_str({ 0x2B, 0x2F, 0x76, 0x39 }));
            encoded_signatures_to_trim_.push_back(make_raw_str({ 0x2B, 0x2F, 0x76, 0x2B }));
            encoded_signatures_to_trim_.push_back(make_raw_str({ 0x2B, 0x2F, 0x76, 0x2F }));
        }
        else if (sameEncoding(encoding, "UTF-8")) {
            force_non_empty_signature_to_prepend_detection = false;
            encoded_signatures_to_trim_.push_back(make_raw_str({ 0xEF, 0xBB, 0xBF }));
        }
        else if (sameEncoding(encoding, "UTF-EBCDIC")) {
            force_non_empty_signature_to_prepend_detection = false;
            encoded_signatures_to_trim_.push_back(make_raw_str({ 0xDD, 0x73, 0x66, 0x73 }));
        }
        else if (
            sameEncoding(encoding, "UTF-16BE") || (sameEncoding(encoding, "UTF-16") && !isLittleEndian()) ||
            sameEncoding(encoding, "UCS-2BE") || (sameEncoding(encoding, "UCS-2") && !isLittleEndian())
        ) {
            encoded_signatures_to_trim_.push_back(make_raw_str({ 0xFE, 0xFF }));
        }
        else if (
            sameEncoding(encoding, "UTF-16LE") || (sameEncoding(encoding, "UTF-16") && isLittleEndian()) ||
            sameEncoding(encoding, "UCS-2LE") || (sameEncoding(encoding, "UCS-2") && isLittleEndian())
        ) {
            encoded_signatures_to_trim_.push_back(make_raw_str({ 0xFF, 0xFE }));
        }
        else if (
            sameEncoding(encoding, "UTF-32BE") || (sameEncoding(encoding, "UTF-32") && !isLittleEndian()) ||
            sameEncoding(encoding, "UCS-4BE") || (sameEncoding(encoding, "UCS-4") && !isLittleEndian())
        ) {
            encoded_signatures_to_trim_.push_back(make_raw_str({ 0x00, 0x00, 0xFE, 0xFF }));
        }
        else if (
            sameEncoding(encoding, "UTF-32LE") || (sameEncoding(encoding, "UTF-32") && isLittleEndian()) ||
            sameEncoding(encoding, "UCS-4LE") || (sameEncoding(encoding, "UCS-4") && isLittleEndian())
        ) {
            encoded_signatures_to_trim_.push_back(make_raw_str({ 0xFF, 0xFE, 0x00, 0x00 }));
        }
        else if (sameEncoding(encoding, "SCSU")) {
            force_non_empty_signature_to_prepend_detection = false;
            encoded_signatures_to_trim_.push_back(make_raw_str({ 0x0E, 0xFE, 0xFF }));
        }
        else if (sameEncoding(encoding, "BOCU-1")) {
            force_non_empty_signature_to_prepend_detection = false;
            encoded_signatures_to_trim_.push_back(make_raw_str({ 0xFB, 0xEE, 0x28 }));
        }
        else if (sameEncoding(encoding, "GB-18030")) {
            force_non_empty_signature_to_prepend_detection = false;
            encoded_signatures_to_trim_.push_back(make_raw_str({ 0x84, 0x31, 0x95, 33 }));
        }

        // Detect the signature generated by the converter. Inspired by the code of ICU's uconv utility.

        std::basic_string<UChar> pivot;
        pivot.push_back(UChar(0xFEFF)); // Pivot's signature/BOM. May cause duplicate BOM on the outpot (which is tolerable here), but also may result in outputting BOM in cases when there wouldn't be one otherwise.
        pivot.push_back(UChar(0x61)); // 'a'

        auto detect_signature = [&] (auto char_tag) {
            using DestinationCharType = std::decay_t<decltype(char_tag)>;

            std::basic_string<DestinationCharType> dest;
            dest.resize(128); // This should be big enought to store the biggest possible signature + encoded 'a' of any known encoding.

            {
                UErrorCode error_code = U_ZERO_ERROR;

                auto * source = pivot.c_str();
                auto * source_end = pivot.c_str() + pivot.size();

                auto * target = const_cast<char *>(reinterpret_cast<const char *>(dest.c_str()));
                auto * target_end = reinterpret_cast<const char *>(dest.c_str() + dest.size());
                auto * target_prev = target;

                ucnv_fromUnicode(converter_, &target, target_end, &source, source_end, nullptr, true, &error_code);

                if (
                    U_FAILURE(error_code) ||
                    source != source_end ||
                    target_end < target ||
                    ((target - target_prev) % sizeof(DestinationCharType)) != 0
                ) {
                    throw std::runtime_error("unable to detect signature: helper ucnv_fromUnicode() failed");
                }

                dest.resize((target - target_prev) / sizeof(DestinationCharType));
            }

            {
                UErrorCode error_code = U_ZERO_ERROR;
                std::int32_t signature_length = 0; // in bytes

                const auto * charset_name = ucnv_detectUnicodeSignature(reinterpret_cast<const char *>(dest.c_str()), dest.size() * sizeof(DestinationCharType), &signature_length, &error_code);

                if (
                    U_SUCCESS(error_code) &&
                    charset_name != nullptr &&
                    signature_length > 0 &&
                    signature_length < (dest.size() * sizeof(DestinationCharType)) &&
                    signature_length % sizeof(DestinationCharType) == 0
                ) {
                    dest.resize(signature_length / sizeof(DestinationCharType));
                }
                else {
                    dest.clear();
                }
            }

            encoded_signature_to_prepend_.assign(reinterpret_cast<const char *>(dest.c_str()), dest.size() * sizeof(DestinationCharType));
        };

        switch (getEncodedMinCharSize()) {
            case 1: detect_signature(char{});     break;
            case 2: detect_signature(char16_t{}); break;
            case 4: detect_signature(char32_t{}); break;
            default: throw std::runtime_error("unable to detect signature: unable to choose a character type for the converter");
        }

        if (!encoded_signature_to_prepend_.empty()) {
            auto it = std::find(encoded_signatures_to_trim_.begin(), encoded_signatures_to_trim_.end(), encoded_signature_to_prepend_);
            if (it == encoded_signatures_to_trim_.end())
                encoded_signatures_to_trim_.push_back(encoded_signature_to_prepend_);
        }
        else if (
            force_non_empty_signature_to_prepend_detection &&
            !encoded_signatures_to_trim_.empty()
        ) {
            encoded_signature_to_prepend_ = encoded_signatures_to_trim_.front();
        }

        for (auto & signature : encoded_signatures_to_trim_) {
            if (signature.size() > encoded_signatures_to_trim_max_size_)
                encoded_signatures_to_trim_max_size_ = signature.size();
        }

        // Pivot is hardcoded UTF-16, see converter_pivot_wide_char_encoding.
        pivot_signature_to_prepend_ = (isLittleEndian() ? make_raw_str({ 0xFF, 0xFE }) : make_raw_str({ 0xFE, 0xFF }));
        pivot_signatures_to_trim_.push_back(pivot_signature_to_prepend_);
        pivot_signatures_to_trim_max_size_ = pivot_signature_to_prepend_.size();
    }
}

UnicodeConverter::~UnicodeConverter() {
    if (converter_) {
        ucnv_close(converter_);
        converter_ = nullptr;
    }
}

#endif
